{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "原文代码作者：https://github.com/wzyonggege/statistical-learning-method\n",
    "\n",
    "中文注释制作：机器学习初学者(微信公众号：ID:ai-start-com)\n",
    "\n",
    "配置环境：python 3.6\n",
    "\n",
    "代码全部测试通过。\n",
    "![gongzhong](../gongzhong.jpg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  第3章 k近邻法"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 距离度量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "from itertools import combinations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- p = 1 曼哈顿距离\n",
    "- p = 2 欧氏距离\n",
    "- p = inf  闵式距离minkowski_distance "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def L(x, y, p=2):\n",
    "    # x1 = [1, 1], x2 = [5,1]\n",
    "    if len(x) == len(y) and len(x) > 1:\n",
    "        sum = 0\n",
    "        for i in range(len(x)):\n",
    "            sum += math.pow(abs(x[i] - y[i]), p)\n",
    "        return math.pow(sum, 1/p)\n",
    "    else:\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 课本例3.1\n",
    "x1 = [1, 1]\n",
    "x2 = [5, 1]\n",
    "x3 = [4, 4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4.0, '1-[5, 1]')\n",
      "(4.0, '1-[5, 1]')\n",
      "(3.7797631496846193, '1-[4, 4]')\n",
      "(3.5676213450081633, '1-[4, 4]')\n"
     ]
    }
   ],
   "source": [
    "# x1, x2\n",
    "for i in range(1, 5):\n",
    "    r = { '1-{}'.format(c):L(x1, c, p=i) for c in [x2, x3]}\n",
    "    print(min(zip(r.values(), r.keys())))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "python实现，遍历所有数据点，找出n个距离最近的点的分类情况，少数服从多数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data\n",
    "iris = load_iris()\n",
    "df = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
    "df['label'] = iris.target\n",
    "df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']\n",
    "# data = np.array(df.iloc[:100, [0, 1, -1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length</th>\n",
       "      <th>sepal width</th>\n",
       "      <th>petal length</th>\n",
       "      <th>petal width</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4.4</td>\n",
       "      <td>2.9</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>4.3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>5.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>5.7</td>\n",
       "      <td>4.4</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>5.7</td>\n",
       "      <td>3.8</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.8</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>5.2</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>5.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>6.9</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>5.6</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.9</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>7.7</td>\n",
       "      <td>2.8</td>\n",
       "      <td>6.7</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.7</td>\n",
       "      <td>4.9</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.3</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>7.2</td>\n",
       "      <td>3.2</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>6.2</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>6.1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.9</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>6.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>7.2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>1.6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>7.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>6.1</td>\n",
       "      <td>1.9</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>7.9</td>\n",
       "      <td>3.8</td>\n",
       "      <td>6.4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>6.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>6.1</td>\n",
       "      <td>2.6</td>\n",
       "      <td>5.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>7.7</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>6.3</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>6.4</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.5</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>6.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>6.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>5.8</td>\n",
       "      <td>2.7</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.9</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>6.8</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.9</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.3</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.9</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>6.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>5.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal length  sepal width  petal length  petal width  label\n",
       "0             5.1          3.5           1.4          0.2      0\n",
       "1             4.9          3.0           1.4          0.2      0\n",
       "2             4.7          3.2           1.3          0.2      0\n",
       "3             4.6          3.1           1.5          0.2      0\n",
       "4             5.0          3.6           1.4          0.2      0\n",
       "5             5.4          3.9           1.7          0.4      0\n",
       "6             4.6          3.4           1.4          0.3      0\n",
       "7             5.0          3.4           1.5          0.2      0\n",
       "8             4.4          2.9           1.4          0.2      0\n",
       "9             4.9          3.1           1.5          0.1      0\n",
       "10            5.4          3.7           1.5          0.2      0\n",
       "11            4.8          3.4           1.6          0.2      0\n",
       "12            4.8          3.0           1.4          0.1      0\n",
       "13            4.3          3.0           1.1          0.1      0\n",
       "14            5.8          4.0           1.2          0.2      0\n",
       "15            5.7          4.4           1.5          0.4      0\n",
       "16            5.4          3.9           1.3          0.4      0\n",
       "17            5.1          3.5           1.4          0.3      0\n",
       "18            5.7          3.8           1.7          0.3      0\n",
       "19            5.1          3.8           1.5          0.3      0\n",
       "20            5.4          3.4           1.7          0.2      0\n",
       "21            5.1          3.7           1.5          0.4      0\n",
       "22            4.6          3.6           1.0          0.2      0\n",
       "23            5.1          3.3           1.7          0.5      0\n",
       "24            4.8          3.4           1.9          0.2      0\n",
       "25            5.0          3.0           1.6          0.2      0\n",
       "26            5.0          3.4           1.6          0.4      0\n",
       "27            5.2          3.5           1.5          0.2      0\n",
       "28            5.2          3.4           1.4          0.2      0\n",
       "29            4.7          3.2           1.6          0.2      0\n",
       "..            ...          ...           ...          ...    ...\n",
       "120           6.9          3.2           5.7          2.3      2\n",
       "121           5.6          2.8           4.9          2.0      2\n",
       "122           7.7          2.8           6.7          2.0      2\n",
       "123           6.3          2.7           4.9          1.8      2\n",
       "124           6.7          3.3           5.7          2.1      2\n",
       "125           7.2          3.2           6.0          1.8      2\n",
       "126           6.2          2.8           4.8          1.8      2\n",
       "127           6.1          3.0           4.9          1.8      2\n",
       "128           6.4          2.8           5.6          2.1      2\n",
       "129           7.2          3.0           5.8          1.6      2\n",
       "130           7.4          2.8           6.1          1.9      2\n",
       "131           7.9          3.8           6.4          2.0      2\n",
       "132           6.4          2.8           5.6          2.2      2\n",
       "133           6.3          2.8           5.1          1.5      2\n",
       "134           6.1          2.6           5.6          1.4      2\n",
       "135           7.7          3.0           6.1          2.3      2\n",
       "136           6.3          3.4           5.6          2.4      2\n",
       "137           6.4          3.1           5.5          1.8      2\n",
       "138           6.0          3.0           4.8          1.8      2\n",
       "139           6.9          3.1           5.4          2.1      2\n",
       "140           6.7          3.1           5.6          2.4      2\n",
       "141           6.9          3.1           5.1          2.3      2\n",
       "142           5.8          2.7           5.1          1.9      2\n",
       "143           6.8          3.2           5.9          2.3      2\n",
       "144           6.7          3.3           5.7          2.5      2\n",
       "145           6.7          3.0           5.2          2.3      2\n",
       "146           6.3          2.5           5.0          1.9      2\n",
       "147           6.5          3.0           5.2          2.0      2\n",
       "148           6.2          3.4           5.4          2.3      2\n",
       "149           5.9          3.0           5.1          1.8      2\n",
       "\n",
       "[150 rows x 5 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.legend.Legend at 0x1c32007e240>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X+YXVV97/H312EkUw3MBcZKMsFBoLlCoISMIMaLCrbRkIYUEMKjthEu3HpR8KHiI9YixlbQWKWU52IJWFG4gZhi+KFAERp/8CM4IZhoYhQqNjNwSwgmgAZIhu/9Y+85mTk5M3P2nLPO2Wufz+t55pnZ++yz57v2gflm77W+a5m7IyIiAvCaZgcgIiL5oaQgIiIlSgoiIlKipCAiIiVKCiIiUqKkICIiJUoKIiJSoqQgIiIlSgoiIlKyV+hfYGZtQB8w4O7zyl5bBCwBBtJdV7v7dWOd74ADDvCenp4AkYqIFNeaNWuedfeu8Y4LnhSAC4GNwD6jvH6Lu3+02pP19PTQ19dXl8BERFqFmf2mmuOCPj4ys27gZGDMf/2LiEg+hO5TuBL4JPDqGMecZmbrzGyFmU2rdICZnWdmfWbWt2XLliCBiohIwKRgZvOAZ9x9zRiH3QH0uPtRwPeBGyod5O7Xunuvu/d2dY37SExERCYoZJ/CbGC+mc0FJgH7mNmN7v7BoQPcfeuw45cCXwwYj4hITXbu3El/fz8vvfRSs0MZ1aRJk+ju7qa9vX1C7w+WFNz9EuASADN7F/CJ4Qkh3X+guz+dbs4n6ZAWEcml/v5+Jk+eTE9PD2bW7HD24O5s3bqV/v5+Dj744Amdo+F1Cma22Mzmp5sXmNnPzeynwAXAokbHIyJSrZdeeon9998/lwkBwMzYf//9a7qTacSQVNx9FbAq/fnSYftLdxMiRbNy7QBL7tnEU9t2MKWzg4vnTGfBzKnNDktqlNeEMKTW+BqSFERazcq1A1xy63p27BwEYGDbDi65dT2AEoPkmqa5EAlgyT2bSglhyI6dgyy5Z1OTIpKiuPvuu5k+fTqHHnooV1xxRd3Pr6QgEsBT23Zk2i9SjcHBQc4//3zuuusuNmzYwLJly9iwYUNdf4ceH4kEMKWzg4EKCWBKZ0cTopFmqXe/0iOPPMKhhx7Km9/8ZgAWLlzIbbfdxuGHH16vkHWnIBLCxXOm09HeNmJfR3sbF8+Z3qSIpNGG+pUGtu3A2d2vtHLtwLjvHc3AwADTpu2e+KG7u5uBgYmfrxIlBZEAFsycyuWnHsnUzg4MmNrZweWnHqlO5hYSol/J3ffYV+/RUHp8JBLIgplTlQRaWIh+pe7ubjZv3lza7u/vZ8qUKRM+XyW6UxARCWC0/qNa+pXe+ta38qtf/Ypf//rXvPLKK9x8883Mnz9//DdmoKQgIhJAiH6lvfbai6uvvpo5c+bwlre8hTPOOIMjjjii1lBH/o66nk1ERIDdRYr1rmqfO3cuc+fOrUeIFSkpiIgEEmO/kh4fiYhIiZKCiIiUKCmIiEiJkoKIiJQoKYiISImSgrS8lWsHmH3F/Rz8qe8y+4r7a5qbRiS0s88+mze84Q3MmDEjyPmVFKSlhZi0TCSkRYsWcffddwc7v5KCtDQthiNBrVsOX50Bl3Um39ctr/mUJ5xwAvvtt18dgqtMxWvS0rQYjgSzbjnccQHsTP9b2r452QY46ozmxTUO3SlISwsxaZkIAPct3p0QhuzckezPMSUFaWlaDEeC2d6fbX9O6PGRtLRQk5aJsG938sio0v4cU1KQlhfjpGUSgZMuHdmnANDekeyvwVlnncWqVat49tln6e7u5nOf+xznnHNOjcHupqQgTVPvRc1FcmWoM/m+xckjo327k4RQYyfzsmXL6hDc6JQUpCmG6gOGhoMO1QcASgxSHEedkeuRRpWoo1maQvUBIvmkpCBNofoAiZW7NzuEMdUan5KCNIXqAyRGkyZNYuvWrblNDO7O1q1bmTRp0oTPoT4FaYqL50wf0acAqg+Q/Ovu7qa/v58tW7Y0O5RRTZo0ie7uiQ97VVKQplB9gMSovb2dgw8+uNlhBBU8KZhZG9AHDLj7vLLX9ga+CcwCtgJnuvuToWOSfFB9gEj+NOJO4UJgI7BPhdfOAX7r7oea2ULgi8CZDYhJJFdUsyF5EbSj2cy6gZOB60Y55BTghvTnFcBJZmYhYxLJG63pIHkSevTRlcAngVdHeX0qsBnA3XcB24H9A8ckkiuq2ZA8CZYUzGwe8Iy7rxnrsAr79hjrZWbnmVmfmfXluddfZCJUsyF5EvJOYTYw38yeBG4GTjSzG8uO6QemAZjZXsC+wHPlJ3L3a9291917u7q6AoYs0niq2ZA8CZYU3P0Sd+929x5gIXC/u3+w7LDbgb9Mfz49PSafVSEigWhNB8mThtcpmNlioM/dbweuB75lZo+T3CEsbHQ8Is2mmg3JE4vtH+a9vb3e19fX7DBERKJiZmvcvXe841TRLIXzmZXrWbZ6M4PutJlx1nHT+LsFRzY7LJEoKClIoXxm5XpufPg/S9uD7qVtJQaR8WmWVCmUZasrrIk7xn4RGUlJQQplcJQ+stH2i8hISgpSKG2jzJIy2n4RGUlJQQrlrOOmZdovIiOpo1kKZagzWaOPRCZGdQoiIi1AdQrSFB9Y+hAPPLF7+qrZh+zHTece38SImkdrJEiM1KcgdVOeEAAeeOI5PrD0oSZF1DxaI0FipaQgdVOeEMbbX2RaI0FipaQgEoDWSJBYKSmIBKA1EiRWSgpSN7MP2S/T/iLTGgkSKyUFqZubzj1+jwTQqqOPFsycyuWnHsnUzg4MmNrZweWnHqnRR5J7qlMQEWkBqlOQpgg1Nj/LeVUfIDJxSgpSN0Nj84eGYg6NzQdq+qOc5byhYhBpFepTkLoJNTY/y3lVHyBSGyUFqZtQY/OznFf1ASK1UVKQugk1Nj/LeVUfIFIbJQWpm1Bj87OcV/UBIrVRR7PUzVBHbr1H/mQ5b6gYRFqF6hRERFqA6hRyKsYx9DHGLCITo6TQQDGOoY8xZhGZOHU0N1CMY+hjjFlEJk5JoYFiHEMfY8wiMnFKCg0U4xj6GGMWkYlTUmigGMfQxxiziEycOpobKMYx9DHGLCITF6xOwcwmAT8E9iZJPivc/bNlxywClgAD6a6r3f26sc6rOgURkezyUKfwMnCiu79oZu3Aj83sLnd/uOy4W9z9owHjkBp9ZuV6lq3ezKA7bWacddw0/m7BkTUfm5f6h7zEIZIH4yYFM9sbOA3oGX68uy8e632e3IK8mG62p19xlU8Ln1m5nhsf/s/S9qB7abv8j32WY/NS/5CXOETyopqO5tuAU4BdwO+GfY3LzNrM7DHgGeBed19d4bDTzGydma0ws2lVxi0Nsmz15qr3Zzk2L/UPeYlDJC+qeXzU7e7vncjJ3X0QONrMOoHvmNkMd//ZsEPuAJa5+8tm9lfADcCJ5ecxs/OA8wAOOuigiYQiEzQ4Sp9Tpf1Zjs1L/UNe4hDJi2ruFB40s8oPhavk7tuAVcB7y/ZvdfeX082lwKxR3n+tu/e6e29XV1ctoUhGbWZV789ybF7qH/ISh0hejJoUzGy9ma0D3gE8amab0sc8Q/vHZGZd6R0CZtYBvAf4RdkxBw7bnA9snEgjJJyzjqv8RK/S/izH5qX+IS9xiOTFWI+P5tV47gOBG8ysjST5LHf3O81sMdDn7rcDF5jZfJL+iueARTX+TqmzoQ7iakYUZTk2L/UPeYlDJC/GrVMws2+5+4fG29coqlMQEcmunnUKR5SduI1Rnv3L+EKNic9SHxDy3FnaF+O1iM665XDfYtjeD/t2w0mXwlFnNDsqybFRk4KZXQJ8Gugws+eHdgOvANc2ILbCCTUmPkt9QMhzZ2lfjNciOuuWwx0XwM50JNX2zck2KDHIqEbtaHb3y919MrDE3fdJvya7+/7ufkkDYyyMUGPis9QHhDx3lvbFeC2ic9/i3QlhyM4dyX6RUYx1p3BM+uO3h/1c4u6PBouqoEKNic9SHxDy3FnaF+O1iM72/mz7RRi7T+Ef0u+TgF7gpySPj44CVpMMVZUMpnR2MFDhj16tY+LbzCr+0RutbiDUubO0L8ZrEZ19u5NHRpX2i4xirMdH73b3dwO/AY5Ji8dmATOBxxsVYJGEGhOfpT4g5LmztC/GaxGdky6F9rIk296R7BcZRTWjj/67u68f2nD3n5nZ0QFjKqxQY+Kz1AeEPHeW9sV4LaIz1Jms0UeSQTV1CstIJsC7kWSW0w8Cr3f3s8KHtyfVKYiIZFfPOoUPAx8BLky3fwhcU0NsEpk81B5I5FQvEY1xk4K7vwR8Nf2SFpOH2gOJnOolojLWhHjL0+/r04nwRnw1LkRppjzUHkjkVC8RlbHuFIYeF9U6MZ5ELA+1BxI51UtEZawhqU+nP54EvNbdfzP8qzHhSbNlWW9AaxNIRaPVRaheIpeqWWSnB/hnM3vCzJab2cc0JLV15KH2QCKneomoVNPRfCmUFso5F7gYuBJoG+t9Ugx5qD2QyKleIirV1Cl8BpgNvB5YC/wY+NGwx0sNpToFEZHs6lmncCrJymjfBX4APJwOUy20UOPts5w3L+sCqPYgZ4o+5r/o7cuiCdeimsdHx5jZZJIJ8P4EWGpm/+XuhZ0QL9R4+yznzcu6AKo9yJmij/kvevuyaNK1GLej2cxmkExt8ZfAmUA/cH+wiHIg1Hj7LOfNy7oAqj3ImaKP+S96+7Jo0rWo5vHRF0keG10F/MTddwaNKAdCjbfPct68rAug2oOcKfqY/6K3L4smXYtx7xTc/WR3/5K7P9gKCQHCjbfPct7R5v9v9LoAqj3ImaKP+S96+7Jo0rWopk6h5YQab5/lvHlZF0C1BzlT9DH/RW9fFk26FtU8Pmo5ocbbZzlvXtYFUO1BzhR9zH/R25dFk67FuHUKeaM6BRGR7GquUzCzO0gW1anI3edPMLaWlof6hw8sfYgHnniutD37kP246dzja45BpFDuvAjWfAN8EKwNZi2CeV+p/bw5r8MY6/HRlxsWRYvIQ/1DeUIAeOCJ5/jA0oeUGESG3HkR9F2/e9sHd2/XkhgiqMMYa5bUH4z11cggiyIP9Q/lCWG8/SItac03su2vVgR1GON2NJvZYcDlwOHApKH97v7mgHEVUh7qH0SkCj6YbX+1IqjDqGZI6r+QrMm8C3g38E3gWyGDKqo81D+ISBVslEmgR9tfrQjqMKpJCh3ufh/JSKXfuPtlwIlhwyqmPNQ/zD5kv4rnGG2/SEuatSjb/mpFUIdRTVJ4ycxeA/zKzD5qZn8OvCFwXIW0YOZULj/1SKZ2dmDA1M4OLj/1yLrUP1R73pvOPX6PBKDRRyJl5n0Fes/ZfWdgbcl2raOPjjoD/uwq2HcaYMn3P7sqN53MUN16Cm8FNgKdwOeBfYEvufvD4cPbk+oURESyq9t6Cu7+k/SErwEucPcXqgxgEvBDYO/096xw98+WHbM3SR/FLGArcKa7P1nN+bPKWh8Q2xoCWdZeKPq1CDoOPMvY9VBxhGxfzsfQ1yRr24p8LcZQzeijXpLO5snp9nbgbHdfM85bXwZOdPcXzawd+LGZ3VV2h3EO8Ft3P9TMFpLMyHrmRBoylqz1AbGtIZBl7YWiX4ug48CzjF0PFUfI9kUwhn7CsratyNdiHNX0KXwd+N/u3uPuPcD5JEliTJ54Md1sT7/Kn1WdAtyQ/rwCOMms/tOAZq0PiG0NgSxrLxT9WgQdB55l7HqoOEK2L4Ix9BOWtW1FvhbjqCYpvODuPxracPcfA9U+Qmozs8eAZ4B73X112SFTgc3peXcB24H9K5znPDPrM7O+LVu2VPOrR8g6jj+2cf9Z1l4o+rUIOg48y9j1UHGEbF8EY+gnLGvbinwtxlFNUnjEzP7ZzN5lZu80s/8DrDKzY8zsmLHe6O6D7n400A0cm67iNlylu4I9/pK5+7Xu3uvuvV1dXVWEPFLWcfyxjfvPsvZC0a9F0HHgWcauh4ojZPsiGEM/YVnbVuRrMY5qksLRwB8BnwUuA94CvB34B6qcH8ndtwGrgPeWvdQPTAMws71IRjbVfb6FrPUBsa0hkGXthaJfi6DjwLOMXQ8VR8j2RTCGfsKytq3I12Ic1Yw+evdETmxmXcBOd99mZh3Ae0g6koe7nWTt54eA04H7PcBc3lnXBIhtDYEsay8U/VoEnYN+qDO5mtFHoeII2b4ir2WQtW1FvhbjqKZO4Q+BLwBT3P19ZnY4cLy7Xz/O+44i6URuI7kjWe7ui81sMdDn7renw1a/BcwkuUNY6O7/MdZ5VacgIpJd3eoUgG+QjDb6m3T7l8AtwJhJwd3XkfyxL99/6bCfXwLeX0UMIiLSANX0KRzg7suBV6E0SqjGqQLzb+XaAWZfcT8Hf+q7zL7iflauHWh2SJIH65bDV2fAZZ3J93XL63NsKFljyEP7YjtvwVRzp/A7M9ufdFSQmb2NZOhoYUVXsCWNkaWgKQ/FTyELtmIrzsvD5xGJau4ULiLpED7EzB4gmZbiY0GjarLoCrakMbIUNOWh+ClkwVZsxXl5+DwiUc3oo0fN7J3AdJK6gk3uvjN4ZE0UXcGWNEaWgqY8FD+FLNiKrTgvD59HJMa9UzCz95OsqfBzYAFwy3hFa7GLrmBLGiNLQVMeip9CFmzFVpyXh88jEtU8Pvpbd3/BzN4BzCEZZnpN2LCaK7qCLWmMLAVNeSh+ClmwFVtxXh4+j0hUkxSGHq6fDFzj7rcBrw0XUvOFWgxHIpdlgZQ8LKaSNYY8tC+28xZQNcVrdwIDJBXJs4AdwCPu/sfhw9uTitdERLKrZ/HaGSRzFn05nbLiQODiWgMUKbwsC/LkRWwx52UhnLzEUQfVjD76PXDrsO2ngadDBiUSvSwL8uRFbDHnpfYgL3HUSTV9CiKSVZYFefIitpjzUnuQlzjqRElBJIQsC/LkRWwx56X2IC9x1ImSgkgIWRbkyYvYYs5L7UFe4qgTJQWRELIsyJMXscWcl9qDvMRRJ0oKIiHM+wr0nrP7X9nWlmznscN2SGwx56X2IC9x1Mm4dQp5ozoFEZHs6lmnIBJGjGO7Q8Ucqj4gxmssTaWkIM0R49juUDGHqg+I8RpL06lPQZojxrHdoWIOVR8Q4zWWplNSkOaIcWx3qJhD1QfEeI2l6ZQUpDliHNsdKuZQ9QExXmNpOiUFaY4Yx3aHijlUfUCM11iaTklBmiPGsd2hYg5VHxDjNZamU52CiEgLqLZOQXcKIuuWw1dnwGWdyfd1yxt/3lAxiGSkOgVpbaHG8mc5r+oJJEd0pyCtLdRY/iznVT2B5IiSgrS2UGP5s5xX9QSSI0oK0tpCjeXPcl7VE0iOKClIaws1lj/LeVVPIDmipCCtLdRY/iznVT2B5EiwOgUzmwZ8E3gj8Cpwrbv/Y9kx7wJuA36d7rrV3cfsXVOdgohIdnlYT2EX8Nfu/qiZTQbWmNm97r6h7Lgfufu8gHFII8U4f3+WmGNsXx7oukUjWFJw96eBp9OfXzCzjcBUoDwpSFHEON5e9QTh6bpFpSF9CmbWA8wEVld4+Xgz+6mZ3WVmRzQiHgkkxvH2qicIT9ctKsErms3s9cC/Ah939+fLXn4UeJO7v2hmc4GVwGEVznEecB7AQQcdFDhimbAYx9urniA8XbeoBL1TMLN2koRwk7vfWv66uz/v7i+mP38PaDezAyocd62797p7b1dXV8iQpRYxjrdXPUF4um5RCZYUzMyA64GN7l5xDmAze2N6HGZ2bBrP1lAxSWAxjrdXPUF4um5RCfn4aDbwIWC9mT2W7vs0cBCAu38NOB34iJntAnYACz22ubxlt6FOw5hGmWSJOcb25YGuW1S0noKISAvIQ52C5JXGjI9050Ww5hvgg8mqZ7MW1b7qmUiklBRajcaMj3TnRdB3/e5tH9y9rcQgLUhzH7UajRkfac03su0XKTglhVajMeMj+WC2/SIFp6TQajRmfCRry7ZfpOCUFFqNxoyPNGtRtv0iBaek0Go0d/9I874CvefsvjOwtmRbnczSolSnICLSAlSn0EAr1w6w5J5NPLVtB1M6O7h4znQWzJza7LDqp+h1DUVvXx7oGkdDSaFGK9cOcMmt69mxMxmtMrBtB5fcuh6gGImh6HUNRW9fHugaR0V9CjVacs+mUkIYsmPnIEvu2dSkiOqs6HUNRW9fHugaR0VJoUZPbduRaX90il7XUPT25YGucVSUFGo0pbMj0/7oFL2uoejtywNd46goKdTo4jnT6WgfWejU0d7GxXOmNymiOit6XUPR25cHusZRUUdzjYY6kws7+qjoc+EXvX15oGscFdUpiIi0gGrrFPT4SKTI1i2Hr86AyzqT7+uWx3FuaRo9PhIpqpD1Aao9KCzdKYgUVcj6ANUeFJaSgkhRhawPUO1BYSkpiBRVyPoA1R4UlpKCSFGFrA9Q7UFhKSmIFFXItTO0LkdhqU5BRKQFqE5BREQyU1IQEZESJQURESlRUhARkRIlBRERKVFSEBGREiUFEREpUVIQEZGSYEnBzKaZ2b+b2UYz+7mZXVjhGDOzq8zscTNbZ2bHhIpHaqB580VaRsj1FHYBf+3uj5rZZGCNmd3r7huGHfM+4LD06zjgmvS75IXmzRdpKcHuFNz9aXd/NP35BWAjUL5w8SnANz3xMNBpZgeGikkmQPPmi7SUhvQpmFkPMBNYXfbSVGDzsO1+9kwcmNl5ZtZnZn1btmwJFaZUonnzRVpK8KRgZq8H/hX4uLs/X/5yhbfsMUOfu1/r7r3u3tvV1RUiTBmN5s0XaSlBk4KZtZMkhJvc/dYKh/QD04ZtdwNPhYxJMtK8+SItJeToIwOuBza6+1dGOex24C/SUUhvA7a7+9OhYpIJ0Lz5Ii0l5Oij2cCHgPVm9li679PAQQDu/jXge8Bc4HHg98CHA8YjE3XUGUoCIi0iWFJw9x9Tuc9g+DEOnB8qBhERyUYVzSIiUqKkICIiJUoKIiJSoqQgIiIlSgoiIlKipCAiIiVKCiIiUmJJqUA8zGwL8JtmxzGKA4Bnmx1EQGpfvIrcNlD7qvEmdx938rjokkKemVmfu/c2O45Q1L54FbltoPbVkx4fiYhIiZKCiIiUKCnU17XNDiAwtS9eRW4bqH11oz4FEREp0Z2CiIiUKClMgJm1mdlaM7uzwmuLzGyLmT2Wfv3PZsRYCzN70szWp/H3VXjdzOwqM3vczNaZ2THNiHMiqmjbu8xs+7DPL6ol5sys08xWmNkvzGyjmR1f9nq0nx1U1b5oPz8zmz4s7sfM7Hkz+3jZMcE/v5CL7BTZhcBGYJ9RXr/F3T/awHhCeLe7jzYu+n3AYenXccA16fdYjNU2gB+5+7yGRVNf/wjc7e6nm9lrgT8oez32z2689kGkn5+7bwKOhuQfnsAA8J2yw4J/frpTyMjMuoGTgeuaHUsTnQJ80xMPA51mdmCzg2p1ZrYPcALJMri4+yvuvq3ssGg/uyrbVxQnAU+4e3mhbvDPT0khuyuBTwKvjnHMaemt3Qozm9aguOrJgX8zszVmdl6F16cCm4dt96f7YjBe2wCON7OfmtldZnZEI4Or0ZuBLcC/pI83rzOz15UdE/NnV037IN7Pb7iFwLIK+4N/fkoKGZjZPOAZd18zxmF3AD3ufhTwfeCGhgRXX7Pd/RiSW9XzzeyEstcrLbMayzC28dr2KMl0AH8M/BOwstEB1mAv4BjgGnefCfwO+FTZMTF/dtW0L+bPD4D0sdh84NuVXq6wr66fn5JCNrOB+Wb2JHAzcKKZ3Tj8AHff6u4vp5tLgVmNDbF27v5U+v0Zkmeax5Yd0g8MvwPqBp5qTHS1Ga9t7v68u7+Y/vw9oN3MDmh4oBPTD/S7++p0ewXJH9HyY6L87KiifZF/fkPeBzzq7v9V4bXgn5+SQgbufom7d7t7D8nt3f3u/sHhx5Q935tP0iEdDTN7nZlNHvoZ+FPgZ2WH3Q78RToS4m3Adnd/usGhZlZN28zsjWZm6c/Hkvw/srXRsU6Eu/8/YLOZTU93nQRsKDssys8OqmtfzJ/fMGdR+dERNODz0+ijOjCzxUCfu98OXGBm84FdwHPAombGNgF/CHwn/f9qL+D/uvvdZvZXAO7+NeB7wFzgceD3wIebFGtW1bTtdOAjZrYL2AEs9LgqPD8G3JQ+gvgP4MMF+eyGjNe+qD8/M/sD4E+A/zVsX0M/P1U0i4hIiR4fiYhIiZKCiIiUKCmIiEiJkoKIiJQoKYiISImSgkhG6UyclWbIrbi/Dr9vgZkdPmx7lZkVdj1iaS4lBZH8WwAcPu5RInWgpCCFk1YufzedFO1nZnZmun+Wmf0gnQzvnqHq8/Rf3lea2YPp8cem+49N961Nv08f6/dWiOHrZvaT9P2npPsXmdmtZna3mf3KzL407D3nmNkv03iWmtnVZvZ2ksr4JZbMsX9Ievj7zeyR9Pj/UadLJ6KKZimk9wJPufvJAGa2r5m1k0yQdoq7b0kTxd8DZ6fveZ27vz2dIO/rwAzgF8AJ7r7LzN4DfAE4rcoY/oZkGpSzzawTeMTMvp++djQwE3gZ2GRm/wQMAn9LMpfPC8D9wE/d/UEzux24091XpO0B2MvdjzWzucBngfdM5EKJlFNSkCJaD3zZzL5I8sf0R2Y2g+QP/b3pH9U2YPicMcsA3P2HZrZP+od8MnCDmR1GMhNle4YY/pRk8sRPpNuTgIPSn+9z9+0AZrYBeBNwAPADd38u3f9t4I/GOP+t6fc1QE+GuETGpKQghePuvzSzWSRzxFxuZv9GMiPqz939+NHeVmH788C/u/ufm1kPsCpDGAaclq6mtXun2XEkdwhDBkn+P6w0JfJYhs4x9H6RulCfghSOmU0Bfu/uNwJfJnkkswnosnRNXzNrt5ELsAz1O7yDZObJ7cC+JEsiQvaJDe8BPjZsxs6Z4xz/CPBOM/tvZrYXIx9TvUBy1yISnP6FIUV0JEnH7KvATuAj7v6KmZ0OXGVm+5L8t38l8PP0Pb81swdJ1t0e6mf4Esnjo4tInvFn8fn0/OvSxPAkMOq6we4+YGZfAFaTzI+/AdievnwzsNQix/0gAAAAaElEQVTMLiCZBVQkGM2SKi3PzFYBn3D3vibH8Xp3fzG9U/gO8HV3L1+4XSQoPT4SyY/LzOwxkoV/fk2ES0lK/HSnICIiJbpTEBGREiUFEREpUVIQEZESJQURESlRUhARkRIlBRERKfn/3o+Od5b5bIEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='0')\n",
    "plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='1')\n",
    "plt.xlabel('sepal length')\n",
    "plt.ylabel('sepal width')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.array(df.iloc[:100, [0, 1, -1]])\n",
    "X, y = data[:,:-1], data[:,-1]\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "class KNN:\n",
    "    def __init__(self, X_train, y_train, n_neighbors=3, p=2):\n",
    "        \"\"\"\n",
    "        parameter: n_neighbors 临近点个数\n",
    "        parameter: p 距离度量\n",
    "        \"\"\"\n",
    "        self.n = n_neighbors\n",
    "        self.p = p\n",
    "        self.X_train = X_train\n",
    "        self.y_train = y_train\n",
    "    \n",
    "    def predict(self, X):\n",
    "        # 取出n个点\n",
    "        knn_list = []\n",
    "        for i in range(self.n):\n",
    "            dist = np.linalg.norm(X - self.X_train[i], ord=self.p)\n",
    "            knn_list.append((dist, self.y_train[i]))\n",
    "            \n",
    "        for i in range(self.n, len(self.X_train)):\n",
    "            max_index = knn_list.index(max(knn_list, key=lambda x: x[0]))\n",
    "            dist = np.linalg.norm(X - self.X_train[i], ord=self.p)\n",
    "            if knn_list[max_index][0] > dist:\n",
    "                knn_list[max_index] = (dist, self.y_train[i])\n",
    "                \n",
    "        # 统计\n",
    "        knn = [k[-1] for k in knn_list]\n",
    "        count_pairs = Counter(knn)\n",
    "        max_count = sorted(count_pairs, key=lambda x:x)[-1]\n",
    "        return max_count\n",
    "    \n",
    "    def score(self, X_test, y_test):\n",
    "        right_count = 0\n",
    "        n = 10\n",
    "        for X, y in zip(X_test, y_test):\n",
    "            label = self.predict(X)\n",
    "            if label == y:\n",
    "                right_count += 1\n",
    "        return right_count / len(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = KNN(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Point: 1.0\n"
     ]
    }
   ],
   "source": [
    "test_point = [6.0, 3.0]\n",
    "print('Test Point: {}'.format(clf.predict(test_point)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.legend.Legend at 0x1c32007e978>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XucVXW9//HXx3ESUIREOgkDjZfipwJxGW+hmZcOXkgN72lF8ouf/iwrk5LseKFjanjSYz7Sg2mZIkocwkt5yQtpmdgAI6ho4kllBn9HQkER9AB+fn+sNZthu2dmr9l77b3W2u/n4zGPmfXda3/ns9bW+bDW+n6+X3N3REREALardgAiIpIcSgoiIpKjpCAiIjlKCiIikqOkICIiOUoKIiKSo6QgIiI5SgoiIpKjpCAiIjnbx/0LzKwOaAba3H1C3muTgBlAW9h0vbv/oqv+dt11V29sbIwhUhGR7Fq0aNE/3H1gd/vFnhSAbwHLgZ07ef0ud/9GsZ01NjbS3NxclsBERGqFmb1azH6x3j4yswbgWKDLf/2LiEgyxP1M4Vrge8AHXexzopktNbO5Zjak0A5mNsXMms2sefXq1bEEKiIiMSYFM5sAvOHui7rY7V6g0d1HAg8Dtxbayd1nunuTuzcNHNjtLTEREemhOJ8pjAOOM7NjgF7AzmZ2u7uf2b6Du6/psP9NwFUxxiMiCbRp0yZaW1t57733qh1KJvTq1YuGhgbq6+t79P7YkoK7TwOmAZjZ54ALOiaEsH03d3893DyO4IG0iNSQ1tZW+vbtS2NjI2ZW7XBSzd1Zs2YNra2t7L777j3qo+J1CmY23cyOCzfPM7PnzOwZ4DxgUqXjEZHqeu+99xgwYIASQhmYGQMGDCjpqqsSQ1Jx9wXAgvDnizu0564mRLJm/pI2Zjz4IqvWbmRQ/95MHT+ME0YPrnZYiaSEUD6lnsuKJAWRWjN/SRvT5i1j46YtALSt3ci0ecsAlBgk0TTNhUgMZjz4Yi4htNu4aQszHnyxShGJFEdJQSQGq9ZujNQuxZs1CxobYbvtgu+zZpXW39q1a/n5z3/eo/dee+21bNiwobQA8lx88cU8/PDDXe6zYMECnnzyybL+3nZKCiIxGNS/d6R2Kc6sWTBlCrz6KrgH36dMKS0xJC0pTJ8+nSOPPLLLfZQURFJm6vhh9K6v26atd30dU8cPq1JE2XDRRZD/N3jDhqC9py688EJefvllRo0axdSpU5kxYwb77bcfI0eO5JJLLgHg3Xff5dhjj+XTn/40w4cP56677uK6665j1apVHHbYYRx22GGd9r/TTjvx3e9+lzFjxnDEEUfQPitDS0sLBx54ICNHjuSLX/wib731FgCTJk1i7ty5QDDX2yWXXMKYMWMYMWIEL7zwAq+88go33ngj11xzDaNGjeKJJ57o+cEXoKQgEoMTRg/miokjGNy/NwYM7t+bKyaO0EPmEr32WrT2Ylx55ZXsueeetLS08PnPf56XXnqJp59+mpaWFhYtWsTjjz/OAw88wKBBg3jmmWd49tlnOeqoozjvvPMYNGgQjz32GI899lin/b/77ruMGTOGxYsXc+ihh3LZZZcB8JWvfIWrrrqKpUuXMmLEiFx7vl133ZXFixdzzjnncPXVV9PY2MjZZ5/Nd77zHVpaWjjkkEN6fvAFaPSRSExOGD1YSaDMhg4NbhkVai+Hhx56iIceeojRo0cDsH79el566SUOOeQQLrjgAr7//e8zYcKESH+It9tuO0499VQAzjzzTCZOnMi6detYu3Ythx56KABf/epXOfnkkwu+f+LEiQCMHTuWefPmlXJ4xcUb+28QESmTyy+HPn22bevTJ2gvB3dn2rRptLS00NLSwooVK5g8eTKf+tSnWLRoESNGjGDatGlMnz69x78jah3BDjvsAEBdXR2bN2/u8e8tlpKCiKTGGWfAzJnwiU+AWfB95sygvaf69u3LO++8A8D48eO55ZZbWL9+PQBtbW288cYbrFq1ij59+nDmmWdywQUXsHjx4g+9tzMffPBB7hnBHXfcwcEHH0y/fv346Ec/mnsecNttt+WuGqLGXG66fSQiqXLGGaUlgXwDBgxg3LhxDB8+nKOPPpovfelLHHTQQUDwkPj2229nxYoVTJ06le222476+npuuOEGAKZMmcLRRx/Nbrvt1ulzhR133JHnnnuOsWPH0q9fP+666y4Abr31Vs4++2w2bNjAHnvswS9/+cuiY/7CF77ASSedxN13383Pfvazsj5XMHcvW2eV0NTU5Fp5TSQ7li9fzt57713tMGKz00475a48KqXQOTWzRe7e1N17dftIRERydPtIRKQMDjjgAN5///1t2m677baKXyWUSklBRKQMFi5cWO0QykK3j0REJEdJQUREcnT7SGqeFsMR2UpXClLT2hfDaVu7EWfrYjjzl7RVOzSpoAceeIBhw4ax1157ceWVV1Y7nKpSUpCapsVwZMuWLZx77rncf//9PP/888yePZvnn3++2mFVjW4fSU3TYjjpU+7bfU8//TR77bUXe+yxBwCnnXYad999N/vss0+5Qk4VXSlITdNiOOkSx+2+trY2hgwZkttuaGigra12bx8qKUhN02I46RLH7b5CU/1Enck0S3T7SGpa+20HjT5Khzhu9zU0NLBy5crcdmtrK4MGDepxf2mnpCA1T4vhpMeg/r1pK5AASrndt99++/HSSy/x97//ncGDB3PnnXdyxx13lBJmqun2kVTN/CVtjLvyUXa/8HeMu/JRDQOVbsVxu2/77bfn+uuvZ/z48ey9996ccsop7LvvvqWGmlq6UpCqaH9g2H5/uP2BIaB/tUun4rrdd8wxx3DMMceUI8TUU1KQqujqgaGSgnRFt/vipdtHUhWqDxBJJiUFqQrVB4gkk5KCVIXqA0SSSc8UpCpUHyCSTLEnBTOrA5qBNnefkPfaDsCvgbHAGuBUd38l7pgkGfTAUCR5KnH76FvA8k5emwy85e57AdcAV1UgHpHEUc1GdZ111ll87GMfY/jw4dUOpepiTQpm1gAcC/yik12OB24Nf54LHGG1POmI1CSt6VB9kyZN4oEHHqh2GIkQ95XCtcD3gA86eX0wsBLA3TcD64ABMcckkiha0yGipXPgmuFwaf/g+9I5JXf52c9+ll122aUMwaVfbEnBzCYAb7j7oq52K9D2oSkLzWyKmTWbWfPq1avLFqNIEqhmI4Klc+De82DdSsCD7/eeV5bEIIE4rxTGAceZ2SvAncDhZnZ73j6twBAAM9se6Ae8md+Ru8909yZ3bxo4cGCMIYtUnmo2InhkOmzKS5abNgbtUhaxJQV3n+buDe7eCJwGPOruZ+btdg/w1fDnk8J9Pjy5uUiGqWYjgnWt0dolsorXKZjZdKDZ3e8BbgZuM7MVBFcIp1U6HpFqU81GBP0awltHBdqlLCqSFNx9AbAg/PniDu3vASdXIgaRJFPNRpGOuDh4htDxFlJ976C9BKeffjoLFizgH//4Bw0NDVx22WVMnjy5xGDTSRXNkjk/nL+M2QtXssWdOjNOP2AI/3rCiGqHJeUw8pTg+yPTg1tG/RqChNDe3kOzZ88uQ3DZoKQgmfLD+cu4/anXcttb3HPbSgwZMfKUkpOAdE4T4kmmzF5Y4H5zF+0isi0lBcmULZ0MXuusXZJBgw7Lp9RzqaQgmVLXySwpnbVL9fXq1Ys1a9YoMZSBu7NmzRp69erV4z70TEEy5fQDhmzzTKFjuyRTQ0MDra2taLaC8ujVqxcNDT0foqukIJnS/jBZo4/So76+nt13373aYUjI0nbJ1tTU5M3NzdUOQ0QkVcxskbs3dbefrhSkrM646S/8+eWt01eN23MXZn39oCpGVD3zl7SpSllSRw+apWzyEwLAn19+kzNu+kuVIqoerZEgaaWkIGWTnxC6a88yrZEgaaWkIBIDrZEgaaWkIBIDrZEgaaWkIGUzbs/Cyxl21p5lWiNB0kpJQcpm1tcP+lACqNXRRyeMHswVE0cwuH9vDBjcvzdXTByh0UeSeKpTEBGpAapTkKqIa2x+lH5VHyDSc0oKUjbtY/Pbh2K2j80HSvqjHKXfuGIQqRV6piBlE9fY/Cj9qj5ApDRKClI2cY3Nj9Kv6gNESqOkIGUT19j8KP2qPkCkNEoKUjZxjc2P0q/qA0RKowfNUjbtD3LLPfInSr9xxSBSK1SnICJSA1SnkFBpHEOfxphFpGeUFCoojWPo0xiziPScHjRXUBrH0KcxZhHpOSWFCkrjGPo0xiwiPaekUEFpHEOfxphFpOeUFCoojWPo0xiziPScHjRXUBrH0KcxZhHpudjqFMysF/A4sANB8pnr7pfk7TMJmAG0hU3Xu/svuupXdQoiItEloU7hfeBwd19vZvXAn8zsfnd/Km+/u9z9GzHGISX64fxlzF64ki3u1Jlx+gFD+NcTRpS8b1LqH5ISh0gSdJsUzGwH4ESgseP+7j69q/d5cAmyPtysD7/SVT4t/HD+Mm5/6rXc9hb33Hb+H/so+yal/iEpcYgkRTEPmu8Gjgc2A+92+OqWmdWZWQvwBvAHd19YYLcTzWypmc01syFFxi0VMnvhyqLbo+yblPqHpMQhkhTF3D5qcPejetK5u28BRplZf+C3Zjbc3Z/tsMu9wGx3f9/MzgZuBQ7P78fMpgBTAIYOHdqTUKSHtnTyzKlQe5R9k1L/kJQ4RJKimCuFJ82s8E3hIrn7WmABcFRe+xp3fz/cvAkY28n7Z7p7k7s3DRw4sJRQJKI6s6Lbo+yblPqHpMQhkhSdJgUzW2ZmS4GDgcVm9mJ4m6e9vUtmNjC8QsDMegNHAi/k7bNbh83jgOU9OQiJz+kHFL6jV6g9yr5JqX9IShwiSdHV7aMJJfa9G3CrmdURJJ857n6fmU0Hmt39HuA8MzuO4HnFm8CkEn+nlFn7A+JiRhRF2Tcp9Q9JiUMkKbqtUzCz29z9y921VYrqFEREoitnncK+eR3X0cm9f+leXGPio9QHxNl3lONL47lInaVz4JHpsK4V+jXAERfDyFOqHZUkWKdJwcymAT8AepvZ2+3NwP8AMysQW+bENSY+Sn1AnH1HOb40novUWToH7j0PNoUjqdatDLZBiUE61emDZne/wt37AjPcfefwq6+7D3D3aRWMMTPiGhMfpT4gzr6jHF8az0XqPDJ9a0Jot2lj0C7Sia6uFMaEP/6mw8857r44tqgyKq4x8VHqA+LsO8rxpfFcpM661mjtInT9TOHfwu+9gCbgGYLbRyOBhQRDVSWCQf1701bgj16pY+LrzAr+0eusbiCuvqMcXxrPRer0awhuGRVqF+lEV7ePDnP3w4BXgTFh8dhYYDSwolIBZklcY+Kj1AfE2XeU40vjuUidIy6G+rwkW987aBfpRDGjj/6Xuy9r33D3Z81sVIwxZVZcY+Kj1AfE2XeU40vjuUid9ofJGn0kERRTpzCbYAK82wlmOT0T2MndT48/vA9TnYKISHTlrFP4GnAO8K1w+3HghhJik5RJQu2BpJzqJVKj26Tg7u8B14RfUmOSUHsgKad6iVTpakK8OeH3ZeFEeNt8VS5EqaYk1B5IyqleIlW6ulJov11U6sR4kmJJqD2QlFO9RKp0NST19fDHI4CPuPurHb8qE55UW5T1BrQ2gRTUWV2E6iUSqZhFdhqB/zCzl81sjpl9U0NSa0cSag8k5VQvkSrFPGi+GHIL5XwdmApcC9R19T7JhiTUHkjKqV4iVYqpU/ghMA7YCVgC/Al4osPtpYpSnYKISHTlrFOYSLAy2u+APwJPhcNUMy2u8fZR+k3KugCqPUiYrI/5z/rxRVGFc1HM7aMxZtaXYAK8zwM3mdl/u3tmJ8SLa7x9lH6Tsi6Aag8SJutj/rN+fFFU6Vx0+6DZzIYTTG3xVeBUoBV4NLaIEiCu8fZR+k3KugCqPUiYrI/5z/rxRVGlc1HM7aOrCG4bXQf81d03xRpRAsQ13j5Kv0lZF0C1BwmT9TH/WT++KKp0Lrq9UnD3Y939J+7+ZC0kBIhvvH2Ufjub/7/S6wKo9iBhsj7mP+vHF0WVzkUxdQo1J67x9lH6Tcq6AKo9SJisj/nP+vFFUaVzUczto5oT13j7KP0mZV0A1R4kTNbH/Gf9+KKo0rnotk4haVSnICISXcl1CmZ2L8GiOgW5+3E9jK2mJaH+4Yyb/sKfX34ztz1uz12Y9fWDSo5BJFPuOx8W/Qp8C1gdjJ0EE35aer8Jr8Po6vbR1RWLokYkof4hPyEA/PnlNznjpr8oMYi0u+98aL5567Zv2bpdSmJIQR1GV7Ok/rGrr0oGmRVJqH/ITwjdtYvUpEW/itZerBTUYXT7oNnMPglcAewD9Gpvd/c9Yowrk5JQ/yAiRfAt0dqLlYI6jGKGpP6SYE3mzcBhwK+B2+IMKquSUP8gIkWwTiaB7qy9WCmowygmKfR290cIRiq96u6XAofHG1Y2JaH+YdyeuxTso7N2kZo0dlK09mKloA6jmKTwnpltB7xkZt8wsy8CH4s5rkw6YfRgrpg4gsH9e2PA4P69uWLiiLLUPxTb76yvH/ShBKDRRyJ5JvwUmiZvvTKwumC71NFHI0+BL1wH/YYAFnz/wnWJecgMxa2nsB+wHOgP/AjoB/zE3Z+KP7wPU52CiEh0ZVtPwd3/Gna4HXCeu79TZAC9gMeBHcLfM9fdL8nbZweCZxRjgTXAqe7+SjH9RxW1PiBtawhEWXsh6+ci1nHgUcauxxVHhH5nzYKLLoLXXoOhQ+Hyy+GMM8rTd+pEPbYsn4suFDP6qIngYXPfcHsdcJa7L+rmre8Dh7v7ejOrB/5kZvfnXWFMBt5y973M7DSCGVlP7cmBdCVqfUDa1hCIsvZC1s9FrOPAo4xdjyuOCP3OmgVTpsCGDcH2q68G29BJYkjBGPoei3psWT4X3SjmmcItwP9190Z3bwTOJUgSXfLA+nCzPvzKv1d1PHBr+PNc4Aiz8k8DGrU+IG1rCERZeyHr5yLWceBRxq7HFUeEfi+6aGtCaLdhQ9Beat+pE/XYsnwuulFMUnjH3Z9o33D3PwHF3kKqM7MW4A3gD+6+MG+XwcDKsN/NwDpgQIF+pphZs5k1r169uphfvY2o4/jTNu4/ytoLWT8XsY4DjzJ2Pa44IvT72msF9uuiPQ1j6Hss6rFl+Vx0o5ik8LSZ/YeZfc7MDjWznwMLzGyMmY3p6o3uvsXdRwENwP7hKm4dFboq+NBfMnef6e5N7t40cODAIkLeVtRx/Gkb9x9l7YWsn4tYx4FHGbseVxwR+h06tPCunbWnYQx9j0U9tiyfi24UkxRGAZ8CLgEuBfYGPgP8G0XOj+Tua4EFwFF5L7UCQwDMbHuCkU1ln28han1A2tYQiLL2QtbPRazjwKOMXY8rjgj9Xn459OmzbVufPkF7qX2nTtRjy/K56EYxo48O60nHZjYQ2OTua82sN3AkwYPkju4hWPv5L8BJwKMew1zeUdcESNsaAlHWXsj6uYh1Dvr2h8nFjD6KK44I/bY/TC569FGW1zKIemxZPhfdKKZO4Z+AHwOD3P1oM9sHOMjdb+7mfSMJHiLXEVyRzHH36WY2HWh293vCYau3AaMJrhBOc/f/6qpf1SmIiERXtjoF4FcEo43axyz8DbgL6DIpuPtSgj/2+e0Xd/j5PeDkImIQEZEKKOaZwq7uPgf4AHKjhEqcKjD55i9pY9yVj7L7hb9j3JWPMn9JW7VDkiRYOgeuGQ6X9g++L51Tnn3jEjWGJBxf2vrNmGKuFN41swGEo4LM7ECCoaOZlbqCLamMKAVNSSh+irNgKwHFeYnoN4OKuVI4n+CB8J5m9meCaSm+GWtUVZa6gi2pjCgFTUkofoqzYCsBxXmJ6DeDihl9tNjMDgWGEdQVvOjum2KPrIpSV7AllRGloCkJxU9xFmwloDgvEf1mULdXCmZ2MsGaCs8BJwB3dVe0lnapK9iSyohS0JSE4qc4C7YSUJyXiH4zqJjbR//i7u+Y2cHAeIJhpjfEG1Z1pa5gSyojSkFTEoqf4izYSkBxXiL6zaBikkL7zfVjgRvc/W7gI/GFVH1xLYYjKRdlgZQkLKYSNYYkHF/a+s2gYorX7gPaCCqSxwIbgafd/dPxh/dhKl4TEYmunMVrpxDMWXR1OGXFbsDUUgMUybwoC/IkRdpiTspCOEmJowyKGX20AZjXYft14PU4gxJJvSgL8iRF2mJOSu1BUuIok2KeKYhIVFEW5EmKtMWclNqDpMRRJkoKInGIsiBPUqQt5qTUHiQljjJRUhCJQ5QFeZIibTEnpfYgKXGUiZKCSByiLMiTFGmLOSm1B0mJo0yUFETiMOGn0DR567+yrS7YTuID23ZpizkptQdJiaNMuq1TSBrVKYiIRFfOOgWReKRxbHdcMcdVH5DGcyxVpaQg1ZHGsd1xxRxXfUAaz7FUnZ4pSHWkcWx3XDHHVR+QxnMsVaekINWRxrHdccUcV31AGs+xVJ2SglRHGsd2xxVzXPUBaTzHUnVKClIdaRzbHVfMcdUHpPEcS9UpKUh1pHFsd1wxx1UfkMZzLFWnOgURkRpQbJ2CrhREls6Ba4bDpf2D70vnVL7fuGIQiUh1ClLb4hrLH6Vf1RNIguhKQWpbXGP5o/SregJJECUFqW1xjeWP0q/qCSRBlBSktsU1lj9Kv6onkARRUpDaFtdY/ij9qp5AEkRJQWpbXGP5o/SregJJkNjqFMxsCPBr4OPAB8BMd//3vH0+B9wN/D1smufuXT5dU52CiEh0SVhPYTPwXXdfbGZ9gUVm9gd3fz5vvyfcfUKMcUglpXH+/igxp/H4kkDnLTViSwru/jrwevjzO2a2HBgM5CcFyYo0jrdXPUH8dN5SpSLPFMysERgNLCzw8kFm9oyZ3W9m+1YiHolJGsfbq54gfjpvqRJ7RbOZ7QT8J/Btd3877+XFwCfcfb2ZHQPMBz5ZoI8pwBSAoUOHxhyx9Fgax9urniB+Om+pEuuVgpnVEySEWe4+L/91d3/b3deHP/8eqDezXQvsN9Pdm9y9aeDAgXGGLKVI43h71RPET+ctVWJLCmZmwM3AcncvOAewmX083A8z2z+MZ01cMUnM0jjeXvUE8dN5S5U4bx+NA74MLDOzlrDtB8BQAHe/ETgJOMfMNgMbgdM8bXN5y1btDw3TNMokSsxpPL4k0HlLFa2nICJSA5JQpyBJpTHj27rvfFj0K/AtwapnYyeVvuqZSEopKdQajRnf1n3nQ/PNW7d9y9ZtJQapQZr7qNZozPi2Fv0qWrtIxikp1BqNGd+Wb4nWLpJxSgq1RmPGt2V10dpFMk5JodZozPi2xk6K1i6ScUoKtUZz929rwk+hafLWKwOrC7b1kFlqlOoURERqgOoUKmj+kjZmPPgiq9ZuZFD/3kwdP4wTRg+udljlk/W6hqwfXxLoHKeGkkKJ5i9pY9q8ZWzcFIxWaVu7kWnzlgFkIzFkva4h68eXBDrHqaJnCiWa8eCLuYTQbuOmLcx48MUqRVRmWa9ryPrxJYHOcaooKZRo1dqNkdpTJ+t1DVk/viTQOU4VJYUSDerfO1J76mS9riHrx5cEOsepoqRQoqnjh9G7fttCp971dUwdP6xKEZVZ1usasn58SaBznCp60Fyi9ofJmR19lPW58LN+fEmgc5wqqlMQEakBxdYp6PaRSJYtnQPXDIdL+wffl85JR99SNbp9JJJVcdYHqPYgs3SlIJJVcdYHqPYgs5QURLIqzvoA1R5klpKCSFbFWR+g2oPMUlIQyao46wNUe5BZSgoiWRXn2hlalyOzVKcgIlIDVKcgIiKRKSmIiEiOkoKIiOQoKYiISI6SgoiI5CgpiIhIjpKCiIjkKCmIiEhObEnBzIaY2WNmttzMnjOzbxXYx8zsOjNbYWZLzWxMXPFICTRvvkjNiHM9hc3Ad919sZn1BRaZ2R/c/fkO+xwNfDL8OgC4IfwuSaF580VqSmxXCu7+ursvDn9+B1gO5C9cfDzwaw88BfQ3s93iikl6QPPmi9SUijxTMLNGYDSwMO+lwcDKDtutfDhxYGZTzKzZzJpXr14dV5hSiObNF6kpsScFM9sJ+E/g2+7+dv7LBd7yoRn63H2muze5e9PAgQPjCFM6o3nzRWpKrEnBzOoJEsIsd59XYJdWYEiH7QZgVZwxSUSaN1+kpsQ5+siAm4Hl7v7TTna7B/hKOArpQGCdu78eV0zSA5o3X6SmxDn6aBzwZWCZmbWEbT8AhgK4+43A74FjgBXABuBrMcYjPTXyFCUBkRoRW1Jw9z9R+JlBx30cODeuGEREJBpVNIuISI6SgoiI5CgpiIhIjpKCiIjkKCmIiEiOkoKIiOQoKYiISI4FpQLpYWargVerHUcndgX+Ue0gYqTjS68sHxvo+IrxCXfvdvK41CWFJDOzZndvqnYccdHxpVeWjw10fOWk20ciIpKjpCAiIjlKCuU1s9oBxEzHl15ZPjbQ8ZWNnimIiEiOrhRERCRHSaEHzKzOzJaY2X0FXptkZqvNrCX8+t/ViLEUZvaKmS0L428u8LqZ2XVmtsLMlprZmGrE2RNFHNvnzGxdh88vVUvMmVl/M5trZi+Y2XIzOyjv9dR+dlDU8aX28zOzYR3ibjGzt83s23n7xP75xbnITpZ9C1gO7NzJ63e5+zcqGE8cDnP3zsZFHw18Mvw6ALgh/J4WXR0bwBPuPqFi0ZTXvwMPuPtJZvYRoE/e62n/7Lo7Pkjp5+fuLwKjIPiHJ9AG/DZvt9g/P10pRGRmDcCxwC+qHUsVHQ/82gNPAf3NbLdqB1XrzGxn4LMEy+Di7v/j7mvzdkvtZ1fk8WXFEcDL7p5fqBv756ekEN21wPeAD7rY58Tw0m6umQ2pUFzl5MBDZrbIzKYUeH0wsLLDdmvYlgbdHRvAQWb2jJndb2b7VjK4Eu0BrAZ+Gd7e/IWZ7Zi3T5o/u2KOD9L7+XV0GjC7QHvsn5+SQgRmNgF4w90XdbHbvUCju48EHgZurUhw5TXO3ccQXKqea2afzXu90DKraRnG1t2xLSaYDuDTwM+A+ZUOsATbA2OAG9x9NPAucGHePmn+7Io5vjR/fgCEt8WOA37p5q7IAAAERElEQVRT6OUCbWX9/JQUohkHHGdmrwB3Aoeb2e0dd3D3Ne7+frh5EzC2siGWzt1Xhd/fILinuX/eLq1AxyugBmBVZaIrTXfH5u5vu/v68OffA/VmtmvFA+2ZVqDV3ReG23MJ/ojm75PKz44iji/ln1+7o4HF7v7fBV6L/fNTUojA3ae5e4O7NxJc3j3q7md23Cfv/t5xBA+kU8PMdjSzvu0/A/8MPJu32z3AV8KREAcC69z99QqHGlkxx2ZmHzczC3/en+D/kTWVjrUn3P3/ASvNbFjYdATwfN5uqfzsoLjjS/Pn18HpFL51BBX4/DT6qAzMbDrQ7O73AOeZ2XHAZuBNYFI1Y+uBfwJ+G/5/tT1wh7s/YGZnA7j7jcDvgWOAFcAG4GtVijWqYo7tJOAcM9sMbARO83RVeH4TmBXegvgv4GsZ+ezadXd8qf78zKwP8Hng/3Roq+jnp4pmERHJ0e0jERHJUVIQEZEcJQUREclRUhARkRwlBRERyVFSEIkonImz0Ay5BdvL8PtOMLN9OmwvMLPMrkcs1aWkIJJ8JwD7dLuXSBkoKUjmhJXLvwsnRXvWzE4N28ea2R/DyfAebK8+D//lfa2ZPRnuv3/Yvn/YtiT8Pqyr31sghlvM7K/h+48P2yeZ2Twze8DMXjKzn3R4z2Qz+1sYz01mdr2ZfYagMn6GBXPs7xnufrKZPR3uf0iZTp2IKpolk44CVrn7sQBm1s/M6gkmSDve3VeHieJy4KzwPTu6+2fCCfJuAYYDLwCfdffNZnYk8GPgxCJjuIhgGpSzzKw/8LSZPRy+NgoYDbwPvGhmPwO2AP9CMJfPO8CjwDPu/qSZ3QPc5+5zw+MB2N7d9zezY4BLgCN7cqJE8ikpSBYtA642s6sI/pg+YWbDCf7Q/yH8o1oHdJwzZjaAuz9uZjuHf8j7Area2ScJZqKsjxDDPxNMnnhBuN0LGBr+/Ii7rwMws+eBTwC7An909zfD9t8An+qi/3nh90VAY4S4RLqkpCCZ4+5/M7OxBHPEXGFmDxHMiPqcux/U2dsKbP8IeMzdv2hmjcCCCGEYcGK4mtbWRrMDCK4Q2m0h+P+w0JTIXWnvo/39ImWhZwqSOWY2CNjg7rcDVxPcknkRGGjhmr5mVm/bLsDS/tzhYIKZJ9cB/QiWRIToExs+CHyzw4ydo7vZ/2ngUDP7qJltz7a3qd4huGoRiZ3+hSFZNILgwewHwCbgHHf/HzM7CbjOzPoR/Ld/LfBc+J63zOxJgnW3258z/ITg9tH5BPf4o/hR2P/SMDG8AnS6brC7t5nZj4GFBPPjPw+sC1++E7jJzM4jmAVUJDaaJVVqnpktAC5w9+Yqx7GTu68PrxR+C9zi7vkLt4vESrePRJLjUjNrIVj45++kcClJST9dKYiISI6uFEREJEdJQUREcpQUREQkR0lBRERylBRERCRHSUFERHL+P0QLAH48MFxhAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='0')\n",
    "plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='1')\n",
    "plt.plot(test_point[0], test_point[1], 'bo', label='test_point')\n",
    "plt.xlabel('sepal length')\n",
    "plt.ylabel('sepal width')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# scikitlearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "           metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
       "           weights='uniform')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf_sk = KNeighborsClassifier()\n",
    "clf_sk.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf_sk.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### sklearn.neighbors.KNeighborsClassifier\n",
    "\n",
    "- n_neighbors: 临近点个数\n",
    "- p: 距离度量\n",
    "- algorithm: 近邻算法，可选{'auto', 'ball_tree', 'kd_tree', 'brute'}\n",
    "- weights: 确定近邻的权重"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### kd树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# kd-tree每个结点中主要包含的数据结构如下 \n",
    "class KdNode(object):\n",
    "    def __init__(self, dom_elt, split, left, right):\n",
    "        self.dom_elt = dom_elt  # k维向量节点(k维空间中的一个样本点)\n",
    "        self.split = split      # 整数（进行分割维度的序号）\n",
    "        self.left = left        # 该结点分割超平面左子空间构成的kd-tree\n",
    "        self.right = right      # 该结点分割超平面右子空间构成的kd-tree\n",
    " \n",
    " \n",
    "class KdTree(object):\n",
    "    def __init__(self, data):\n",
    "        k = len(data[0])  # 数据维度\n",
    "        \n",
    "        def CreateNode(split, data_set): # 按第split维划分数据集exset创建KdNode\n",
    "            if not data_set:    # 数据集为空\n",
    "                return None\n",
    "            # key参数的值为一个函数，此函数只有一个参数且返回一个值用来进行比较\n",
    "            # operator模块提供的itemgetter函数用于获取对象的哪些维的数据，参数为需要获取的数据在对象中的序号\n",
    "            #data_set.sort(key=itemgetter(split)) # 按要进行分割的那一维数据排序\n",
    "            data_set.sort(key=lambda x: x[split])\n",
    "            split_pos = len(data_set) // 2      # //为Python中的整数除法\n",
    "            median = data_set[split_pos]        # 中位数分割点             \n",
    "            split_next = (split + 1) % k        # cycle coordinates\n",
    "            \n",
    "            # 递归的创建kd树\n",
    "            return KdNode(median, split, \n",
    "                          CreateNode(split_next, data_set[:split_pos]),     # 创建左子树\n",
    "                          CreateNode(split_next, data_set[split_pos + 1:])) # 创建右子树\n",
    "                                \n",
    "        self.root = CreateNode(0, data)         # 从第0维分量开始构建kd树,返回根节点\n",
    "\n",
    "\n",
    "# KDTree的前序遍历\n",
    "def preorder(root):  \n",
    "    print (root.dom_elt)  \n",
    "    if root.left:      # 节点不为空\n",
    "        preorder(root.left)  \n",
    "    if root.right:  \n",
    "        preorder(root.right)      "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对构建好的kd树进行搜索，寻找与目标点最近的样本点：\n",
    "from math import sqrt\n",
    "from collections import namedtuple\n",
    "\n",
    "# 定义一个namedtuple,分别存放最近坐标点、最近距离和访问过的节点数\n",
    "result = namedtuple(\"Result_tuple\", \"nearest_point  nearest_dist  nodes_visited\")\n",
    "  \n",
    "def find_nearest(tree, point):\n",
    "    k = len(point) # 数据维度\n",
    "    def travel(kd_node, target, max_dist):\n",
    "        if kd_node is None:     \n",
    "            return result([0] * k, float(\"inf\"), 0) # python中用float(\"inf\")和float(\"-inf\")表示正负无穷\n",
    " \n",
    "        nodes_visited = 1\n",
    "        \n",
    "        s = kd_node.split        # 进行分割的维度\n",
    "        pivot = kd_node.dom_elt  # 进行分割的“轴”\n",
    "        \n",
    "        if target[s] <= pivot[s]:           # 如果目标点第s维小于分割轴的对应值(目标离左子树更近)\n",
    "            nearer_node  = kd_node.left     # 下一个访问节点为左子树根节点\n",
    "            further_node = kd_node.right    # 同时记录下右子树\n",
    "        else:                               # 目标离右子树更近\n",
    "            nearer_node  = kd_node.right    # 下一个访问节点为右子树根节点\n",
    "            further_node = kd_node.left\n",
    " \n",
    "        temp1 = travel(nearer_node, target, max_dist)  # 进行遍历找到包含目标点的区域\n",
    "        \n",
    "        nearest = temp1.nearest_point       # 以此叶结点作为“当前最近点”\n",
    "        dist = temp1.nearest_dist           # 更新最近距离\n",
    "        \n",
    "        nodes_visited += temp1.nodes_visited  \n",
    " \n",
    "        if dist < max_dist:     \n",
    "            max_dist = dist    # 最近点将在以目标点为球心，max_dist为半径的超球体内\n",
    "            \n",
    "        temp_dist = abs(pivot[s] - target[s])    # 第s维上目标点与分割超平面的距离\n",
    "        if  max_dist < temp_dist:                # 判断超球体是否与超平面相交\n",
    "            return result(nearest, dist, nodes_visited) # 不相交则可以直接返回，不用继续判断\n",
    "            \n",
    "        #----------------------------------------------------------------------  \n",
    "        # 计算目标点与分割点的欧氏距离  \n",
    "        temp_dist = sqrt(sum((p1 - p2) ** 2 for p1, p2 in zip(pivot, target)))     \n",
    "        \n",
    "        if temp_dist < dist:         # 如果“更近”\n",
    "            nearest = pivot          # 更新最近点\n",
    "            dist = temp_dist         # 更新最近距离\n",
    "            max_dist = dist          # 更新超球体半径\n",
    "        \n",
    "        # 检查另一个子结点对应的区域是否有更近的点\n",
    "        temp2 = travel(further_node, target, max_dist) \n",
    "        \n",
    "        nodes_visited += temp2.nodes_visited\n",
    "        if temp2.nearest_dist < dist:        # 如果另一个子结点内存在更近距离\n",
    "            nearest = temp2.nearest_point    # 更新最近点\n",
    "            dist = temp2.nearest_dist        # 更新最近距离\n",
    " \n",
    "        return result(nearest, dist, nodes_visited)\n",
    " \n",
    "    return travel(tree.root, point, float(\"inf\"))  # 从根节点开始递归"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 例3.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[7, 2]\n",
      "[5, 4]\n",
      "[2, 3]\n",
      "[4, 7]\n",
      "[9, 6]\n",
      "[8, 1]\n"
     ]
    }
   ],
   "source": [
    "data = [[2,3],[5,4],[9,6],[4,7],[8,1],[7,2]]\n",
    "kd = KdTree(data)\n",
    "preorder(kd.root)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from time import clock\n",
    "from random import random\n",
    "\n",
    "# 产生一个k维随机向量，每维分量值在0~1之间\n",
    "def random_point(k):\n",
    "    return [random() for _ in range(k)]\n",
    " \n",
    "# 产生n个k维随机向量 \n",
    "def random_points(k, n):\n",
    "    return [random_point(k) for _ in range(n)]     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Result_tuple(nearest_point=[2, 3], nearest_dist=1.8027756377319946, nodes_visited=4)\n"
     ]
    }
   ],
   "source": [
    "ret = find_nearest(kd, [3,4.5])\n",
    "print (ret)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time:  7.299844505209247 s\n",
      "Result_tuple(nearest_point=[0.10505669630674175, 0.49542598718931097, 0.8033166919543026], nearest_dist=0.007582362181450973, nodes_visited=53)\n"
     ]
    }
   ],
   "source": [
    "N = 400000\n",
    "t0 = clock()\n",
    "kd2 = KdTree(random_points(3, N))            # 构建包含四十万个3维空间样本点的kd树\n",
    "ret2 = find_nearest(kd2, [0.1,0.5,0.8])      # 四十万个样本点中寻找离目标最近的点\n",
    "t1 = clock()\n",
    "print (\"time: \",t1-t0, \"s\")\n",
    "print (ret2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
